Objective of this notebook

In this notebook, you will find common and useful visualization results from the data obtained in the Alkaloid extraction report

Loading libraries

This section contains the libraries requiered in order to conduct the data analysis and visualization part of this notebook.

library(tidyverse)
library(plotly)
library(DT)
library(factoextra)
library(FactoMineR)
library(patchwork)
library(GGally)

Importing data

Here, we are going to import the short version of the results provided by ms-mint.

alkaloids_area <- read_csv("Results/All_samples_fixed_rt.csv")

Now that we have our data in memory, we can display it.

## Rows: 3
## Columns: 6
## $ ...1          <dbl> 0, 1, 2
## $ ms_file_label <chr> "SPM_7_pos_20", "SPM_7_pos_20", "SPM_7_pos_20"
## $ peak_label    <chr> "Dehydrotomatine", "alpha-Tomatine", "Tomatidine"
## $ mz_mean       <dbl> 1034.5422, 1036.5613, 416.3531
## $ peak_area     <dbl> 1425829, 5211881, 0
## $ Sample        <chr> "SPM", "SPM", "SPM"

Formatting the table

As we see in the ms_file_name, that is the sample concatenated name that includes the Specie, Replicate, Polarity, and Injection number. In the last column, Sample, the specie acronym is shown. At this point, the table is not intuitive, and we are going to work on this.

Extracting useful information form Sample

alkaloids_clean <- alkaloids_area %>% 
  separate(ms_file_label, 
           into = c("Specie", "Replicate", "Polarity", "InjectionNumber"),
           remove = F) %>% 
  select(-Sample, -Polarity, -`...1`) %>% 
  rename(Alkaloid = peak_label, TargetIon = mz_mean, Area = peak_area) %>% 
  select(ms_file_label, Specie, Replicate, InjectionNumber, 
         TargetIon, Alkaloid, Area) %>% 
  mutate(Replicate = factor(Replicate, levels = seq(10)))

Now that we have a cleaner table, we are able to display a better column named table.

Remember to use the search bar to look for specific results.

Visualizations

Barplot - Absolute peak are

The first bar plot refers to the absolute peak are quantified by sample and alkaloid.

bar_alkaloids_absol <- alkaloids_clean %>% 
  filter(!(Specie %in% "OH8243Fruit")) %>% 
  ggplot(aes(Replicate, Area, fill = Alkaloid)) +
  geom_col() +
  facet_wrap("Specie", ncol = 5) +
  theme_classic() +
  labs(x = "Replicate number", y = "Peak area",
       title = "Barplot of peak area per alkaloid")

bar_alkaloids_absol %>% ggplotly

Barplot - Relative peak are

In contrast to the previous plot, where the bar height represents the area observed in each sample, the following plot represent the relative alkaloid content per sample. This plot serves merely to address visual comparison per sample, and not between samples, since the bar height was normalized.

bar_alkaloids_rel <- alkaloids_clean %>% 
  filter(!(Specie %in% "OH8243Fruit")) %>% 
  ggplot(aes(Replicate, Area, fill = Alkaloid)) +
  geom_col(position = "fill") +
  facet_wrap("Specie", ncol = 5) +
  theme_classic() +
  labs(x = "Replicate number", y = "Relative peak area",
       title = "Barplot of relative peak area per alkaloid")

bar_alkaloids_rel %>% ggplotly

PCA

The following section will be based on the following documentation.

alkaloids_wide <-  alkaloids_clean %>% select(-TargetIon) %>% 
  filter(!(Specie %in% "OH8243Fruit")) %>% 
  pivot_wider(names_from = Alkaloid, values_from = Area, values_fill = 0)

Exporting wide alkaloid data

write.csv(alkaloids_wide, file = "Results/wide_Alkaloids_allSamples.csv", row.names = F)

Conducting PCA

Centered Not scaled

alkaloids_only <- alkaloids_wide %>% 
  select(Dehydrotomatine:LycoEscu_rt2p4)

alkaloids_pca <- PCA(alkaloids_only, graph = FALSE)

Eigenvalues/variances plot

fviz_screeplot(alkaloids_pca, addlabels = TRUE, ylim = c(0, 40))

Variable/Alkaloid contribution for dimensionality reduction

vars_info <- get_pca_var(alkaloids_pca)
vars_info$contrib %>% as.data.frame() %>% 
  select(PC1 = Dim.1, PC2 = Dim.2, PC3 = Dim.3, PC4 = Dim.4) %>% 
  arrange(-PC1, -PC2, -PC3) %>% DT::datatable()

Variable/Alkaloid contribution plot

fviz_pca_var(alkaloids_pca, col.var="contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = T # Avoid text overlapping
) + labs(x = "PC1 (36.4%)", y = "PC2 (26.2%)", color = "Contribution")

Top 5 alkaloids contribution to sample differentiation in the first 2 PC

# Contributions of variables to PC1
top5_pc1 <- fviz_contrib(alkaloids_pca, choice = "var", axes = 1, top = 5)
# Contributions of variables to PC2
top5_pc2 <- fviz_contrib(alkaloids_pca, choice = "var", axes = 2, top = 5)

top5_pc1 + top5_pc2

Scores plot

sample_scores <- alkaloids_pca$ind$coord %>% data.frame() #Coordinates
names(sample_scores) <- paste0("PC", seq(5))
alkaloids_scores <- bind_cols(alkaloids_wide, sample_scores)
# Impossible to color by Specie and add a legend
ggpairs(alkaloids_scores, columns = 16:20,
        upper = list(continuous = "points", combo = "box_no_facet"),
        lower = list(continuous = "points", combo = "dot_no_facet"))

alkaloids_scores %>% 
  plot_ly() %>% 
  add_trace(x = ~PC1, y = ~PC2, color = ~Specie,
            text = ~paste("Specie: ", Specie,
                          "<br>Replicate: ", Replicate,
                          "<br>Dehydrotomatine: ", Dehydrotomatine,
                          "<br>alpha-Tomatine: ", `alpha-Tomatine`,
                          "<br>Tomatidine: ", Tomatidine,
                          "<br>Hydroxytomatine_range: ", Hydroxytomatine_range,
                          "<br>Acetoxytomatine_II_rt4p4", Acetoxytomatine_II_rt4p4,
                          "<br>Acetoxytomatine_II_rt5p3", Acetoxytomatine_II_rt5p3,
                          "<br>Acetoxytomatine_II_rt5p4: ", Acetoxytomatine_II_rt5p4,
                          "<br>EsculeosideB_rt2p2: ", EsculeosideB_rt2p2,
                          "<br>EsculeosideB_rt2p3: ", EsculeosideB_rt2p3,
                          "<br>LycoEscu_rt3p3", LycoEscu_rt3p3,
                          "<br>LycoEscu_rt2p4", LycoEscu_rt2p4) )
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] GGally_2.1.2     patchwork_1.1.2  FactoMineR_2.8   factoextra_1.0.7
##  [5] DT_0.28          plotly_4.10.2    lubridate_1.9.2  forcats_1.0.0   
##  [9] stringr_1.5.0    dplyr_1.1.2      purrr_1.0.1      readr_2.1.4     
## [13] tidyr_1.3.0      tibble_3.2.1     ggplot2_3.4.2    tidyverse_2.0.0 
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.6           sass_0.4.7           bit64_4.0.5         
##  [4] vroom_1.6.3          jsonlite_1.8.7       viridisLite_0.4.2   
##  [7] carData_3.0-5        bslib_0.5.0          highr_0.10          
## [10] yaml_2.3.7           ggrepel_0.9.3        backports_1.4.1     
## [13] pillar_1.9.0         lattice_0.21-8       glue_1.6.2          
## [16] digest_0.6.33        ggsignif_0.6.4       RColorBrewer_1.1-3  
## [19] colorspace_2.1-0     htmltools_0.5.5      plyr_1.8.8          
## [22] pkgconfig_2.0.3      broom_1.0.5          xtable_1.8-4        
## [25] mvtnorm_1.2-2        scales_1.2.1         tzdb_0.4.0          
## [28] timechange_0.2.0     emmeans_1.8.7        car_3.1-2           
## [31] generics_0.1.3       farver_2.1.1         ggpubr_0.6.0        
## [34] ellipsis_0.3.2       cachem_1.0.8         withr_2.5.0         
## [37] lazyeval_0.2.2       cli_3.6.1            magrittr_2.0.3      
## [40] crayon_1.5.2         estimability_1.4.1   evaluate_0.21       
## [43] fansi_1.0.4          MASS_7.3-60          rstatix_0.7.2       
## [46] tools_4.2.0          data.table_1.14.8    hms_1.1.3           
## [49] lifecycle_1.0.3      munsell_0.5.0        cluster_2.1.4       
## [52] flashClust_1.01-2    compiler_4.2.0       jquerylib_0.1.4     
## [55] multcompView_0.1-9   rlang_1.1.1          grid_4.2.0          
## [58] rstudioapi_0.15.0    htmlwidgets_1.6.2    crosstalk_1.2.0     
## [61] leaps_3.1            labeling_0.4.2       rmarkdown_2.23      
## [64] gtable_0.3.3         abind_1.4-5          reshape_0.8.9       
## [67] R6_2.5.1             knitr_1.43           fastmap_1.1.1       
## [70] bit_4.0.5            utf8_1.2.3           stringi_1.7.12      
## [73] parallel_4.2.0       Rcpp_1.0.11          vctrs_0.6.3         
## [76] scatterplot3d_0.3-44 tidyselect_1.2.0     xfun_0.39